In [1]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset (assuming the file is located in the same directory)
df = pd.read_csv('sales.csv')

# Set some aesthetic preferences for plots
sns.set(style="whitegrid")

# Function to display different charts
def display_all_charts(df):
    # 1. Countplot: Distribution of Regions
    plt.figure(figsize=(10, 6))
    sns.countplot(x='Region', data=df, palette='Set1')
    plt.title('Distribution of Regions')
    plt.xlabel('Region')
    plt.ylabel('Count')
    plt.show()

    # 2. Barplot: Average Order Value by Region
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Region', y='Average_Order_Value', data=df, palette='Blues')
    plt.title('Average Order Value by Region')
    plt.xlabel('Region')
    plt.ylabel('Average Order Value (USD)')
    plt.show()

    # 3. Boxplot: Distribution of Purchase Frequency by Region
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Region', y='Purchase_Frequency', data=df, palette='Set2')
    plt.title('Distribution of Purchase Frequency by Region')
    plt.xlabel('Region')
    plt.ylabel('Purchase Frequency')
    plt.show()

    # 4. Heatmap: Correlation Matrix between Variables
    plt.figure(figsize=(10, 6))
    corr_matrix = df.corr()
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
    plt.title('Correlation Matrix')
    plt.show()

    # 5. Violin Plot: Churn Probability by Region
    plt.figure(figsize=(10, 6))
    sns.violinplot(x='Region', y='Churn_Probability', data=df, palette='muted')
    plt.title('Churn Probability by Region')
    plt.xlabel('Region')
    plt.ylabel('Churn Probability')
    plt.show()

    # 6. Pairplot: Relationships between Numeric Variables
    sns.pairplot(df[['Lifetime_Value', 'Average_Order_Value', 'Purchase_Frequency', 'Time_Between_Purchases']])
    plt.suptitle('Pairplot of Selected Numeric Variables', y=1.02)
    plt.show()

    # 7. Scatter Plot: Lifetime Value vs Average Order Value
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Lifetime_Value', y='Average_Order_Value', data=df, hue='Region', palette='Set2')
    plt.title('Lifetime Value vs Average Order Value')
    plt.xlabel('Lifetime Value (USD)')
    plt.ylabel('Average Order Value (USD)')
    plt.show()

    # 8. Barplot: Retention Strategy Count
    plt.figure(figsize=(10, 6))
    sns.barplot(x=df['Retention_Strategy'].value_counts().index, y=df['Retention_Strategy'].value_counts().values, palette='Set3')
    plt.title('Count of Retention Strategies')
    plt.xlabel('Retention Strategy')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.show()

    # 9. Line Plot: Average Order Value over Time (by Launch Date)
    df['Launch_Date'] = pd.to_datetime(df['Launch_Date'])
    plt.figure(figsize=(10, 6))
    sns.lineplot(x='Launch_Date', y='Average_Order_Value', data=df, marker='o')
    plt.title('Average Order Value over Time')
    plt.xlabel('Launch Date')
    plt.ylabel('Average Order Value (USD)')
    plt.xticks(rotation=45)
    plt.show()

    # 10. Histogram: Distribution of Lifetime Value (use distplot for older Seaborn versions)
    plt.figure(figsize=(10, 6))
    sns.distplot(df['Lifetime_Value'], bins=30, color='skyblue', kde=True)
    plt.title('Distribution of Lifetime Value')
    plt.xlabel('Lifetime Value (USD)')
    plt.ylabel('Frequency')
    plt.show()

    # 11. Boxplot: Lifetime Value by Season
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Season', y='Lifetime_Value', data=df, palette='coolwarm')
    plt.title('Lifetime Value by Season')
    plt.xlabel('Season')
    plt.ylabel('Lifetime Value (USD)')
    plt.show()

    # 12. Barplot: Average Time Between Purchases by Product Category
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Most_Frequent_Category', y='Time_Between_Purchases', data=df, palette='muted')
    plt.title('Average Time Between Purchases by Product Category')
    plt.xlabel('Product Category')
    plt.ylabel('Average Time Between Purchases (days)')
    plt.xticks(rotation=45)
    plt.show()

    # 13. Pie Chart: Distribution of Customer Retention Strategies
    plt.figure(figsize=(8, 8))
    retention_counts = df['Retention_Strategy'].value_counts()
    retention_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=sns.color_palette('Set3'))
    plt.title('Customer Retention Strategies')
    plt.ylabel('')
    plt.show()

    # 14. Line Plot: Cumulative Lifetime Value by Product
    df_grouped = df.groupby('Product_ID')['Lifetime_Value'].sum()
    plt.figure(figsize=(10, 6))
    sns.lineplot(x=df_grouped.index, y=df_grouped.values, color='orange', marker='o')
    plt.title('Cumulative Lifetime Value by Product')
    plt.xlabel('Product ID')
    plt.ylabel('Cumulative Lifetime Value (USD)')
    plt.show()

    # 15. FacetGrid: Time Between Purchases by Region (with scatter plot)
    g = sns.FacetGrid(df, col="Region", height=6, aspect=1.5)
    g.map(sns.scatterplot, 'Lifetime_Value', 'Time_Between_Purchases', alpha=0.7)
    g.set_axis_labels('Lifetime Value (USD)', 'Time Between Purchases (days)')
    g.set_titles("{col_name}")
    plt.show()

# Call the function to display all charts
display_all_charts(df)
In [2]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset (assuming the file is located in the same directory)
df = pd.read_csv('sales.csv')

# Set some aesthetic preferences for plots
sns.set(style="whitegrid")

# Function to display different charts
def display_all_charts(df):
    # 1. Line Chart (e.g., Average Order Value over Time)
    plt.figure(figsize=(10, 6))
    df.groupby('Launch_Date')['Average_Order_Value'].mean().plot(kind='line')
    plt.title('Average Order Value over Time')
    plt.xlabel('Launch Date')
    plt.ylabel('Average Order Value (USD)')
    plt.xticks(rotation=45)
    plt.show()

    # 2. Bar Chart (e.g., Purchase Frequency by Region)
    plt.figure(figsize=(10, 6))
    df.groupby('Region')['Purchase_Frequency'].sum().plot(kind='bar', color='skyblue')
    plt.title('Purchase Frequency by Region')
    plt.xlabel('Region')
    plt.ylabel('Purchase Frequency')
    plt.xticks(rotation=45)
    plt.show()

    # 3. Scatter Plot (e.g., Lifetime Value vs. Average Order Value)
    plt.figure(figsize=(10, 6))
    plt.scatter(df['Lifetime_Value'], df['Average_Order_Value'], alpha=0.5, color='purple')
    plt.title('Lifetime Value vs. Average Order Value')
    plt.xlabel('Lifetime Value (USD)')
    plt.ylabel('Average Order Value (USD)')
    plt.show()

    # 4. Histogram (e.g., Distribution of Time Between Purchases)
    plt.figure(figsize=(10, 6))
    df['Time_Between_Purchases'].plot(kind='hist', bins=50, color='orange', edgecolor='black')
    plt.title('Distribution of Time Between Purchases')
    plt.xlabel('Time Between Purchases (days)')
    plt.ylabel('Frequency')
    plt.show()

    # 5. Boxplot (e.g., Distribution of Lifetime Value by Region)
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Region', y='Lifetime_Value', data=df, palette='Set2')
    plt.title('Distribution of Lifetime Value by Region')
    plt.xlabel('Region')
    plt.ylabel('Lifetime Value (USD)')
    plt.show()

    # 6. Pie Chart (e.g., Proportion of Customer Retention Strategies)
    plt.figure(figsize=(8, 8))
    retention_counts = df['Retention_Strategy'].value_counts()
    retention_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=sns.color_palette('Set3'))
    plt.title('Customer Retention Strategies')
    plt.ylabel('')
    plt.show()

    # 7. Heatmap (e.g., Correlation Matrix between different numeric variables)
    plt.figure(figsize=(10, 6))
    corr_matrix = df.corr()
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
    plt.title('Correlation Matrix')
    plt.show()

    # 8. Violin Plot (e.g., Churn Probability by Region)
    plt.figure(figsize=(10, 6))
    sns.violinplot(x='Region', y='Churn_Probability', data=df, palette='muted')
    plt.title('Churn Probability by Region')
    plt.xlabel('Region')
    plt.ylabel('Churn Probability')
    plt.show()

    # 9. Pairplot (e.g., Visualizing relationships between numeric variables)
    sns.pairplot(df[['Lifetime_Value', 'Average_Order_Value', 'Purchase_Frequency', 'Time_Between_Purchases']])
    plt.suptitle('Pairplot of Selected Numeric Variables', y=1.02)
    plt.show()

    # 10. Area Plot (e.g., Cumulative Total Lifetime Value by Product)
    df.groupby('Product_ID')['Lifetime_Value'].sum().plot(kind='area', figsize=(10, 6), color='lightcoral', alpha=0.6)
    plt.title('Cumulative Lifetime Value by Product')
    plt.xlabel('Product ID')
    plt.ylabel('Cumulative Lifetime Value (USD)')
    plt.show()
    
    
    

# Call the function to display all charts
display_all_charts(df)
In [3]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset (assuming the file is located in the same directory)
df = pd.read_csv('sales.csv')

# Set some aesthetic preferences for plots
sns.set(style="whitegrid")

# Function to display different charts
def display_all_charts(df):
    # 1. Stacked Bar Plot: Purchase Frequency by Region and Retention Strategy
    plt.figure(figsize=(10, 6))
    pd.crosstab(df['Region'], df['Retention_Strategy']).plot(kind='bar', stacked=True, colormap='Paired', ax=plt.gca())
    plt.title('Purchase Frequency by Region and Retention Strategy')
    plt.xlabel('Region')
    plt.ylabel('Purchase Frequency')
    plt.xticks(rotation=45)
    plt.show()

    # 2. Scatter Plot: Time Between Purchases vs Average Order Value
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Time_Between_Purchases', y='Average_Order_Value', data=df, hue='Region', palette='viridis')
    plt.title('Time Between Purchases vs Average Order Value')
    plt.xlabel('Time Between Purchases (days)')
    plt.ylabel('Average Order Value (USD)')
    plt.show()

    # 3. Heatmap: Region-wise Correlation of Numeric Variables
    plt.figure(figsize=(10, 6))
    # Select only numeric columns for correlation
    numeric_df = df.select_dtypes(include=['float64', 'int64'])
    region_corr = numeric_df.corr()
    
    # Plot the heatmap
    sns.heatmap(region_corr, annot=True, cmap='YlGnBu', linewidths=0.5)
    plt.title('Correlation Matrix of Numeric Variables')
    plt.show()

    # 4. Bar Plot: Average Churn Probability by Region
    plt.figure(figsize=(10, 6))
    avg_churn_by_region = df.groupby('Region')['Churn_Probability'].mean()
    avg_churn_by_region.plot(kind='bar', color='lightblue', ax=plt.gca())
    plt.title('Average Churn Probability by Region')
    plt.xlabel('Region')
    plt.ylabel('Average Churn Probability')
    plt.show()

    # 5. Stripplot: Distribution of Time Between Purchases by Region
    plt.figure(figsize=(10, 6))
    sns.stripplot(x='Region', y='Time_Between_Purchases', data=df, jitter=True, palette='Set2')
    plt.title('Distribution of Time Between Purchases by Region')
    plt.xlabel('Region')
    plt.ylabel('Time Between Purchases (days)')
    plt.show()

    # 6. Pairplot: Lifetime Value vs Purchase Frequency vs Churn Probability
    sns.pairplot(df[['Lifetime_Value', 'Purchase_Frequency', 'Churn_Probability']])
    plt.suptitle('Pairplot of Lifetime Value, Purchase Frequency, and Churn Probability', y=1.02)
    plt.show()

    # 7. Radar Chart: Comparison of Average Order Value, Lifetime Value, and Churn Probability by Region
    def radar_chart(values, labels, title):
        angles = [n / float(len(labels)) * 2 * 3.141592653589793 for n in range(len(labels))]
        values += values[:1]
        angles += angles[:1]
        
        fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
        ax.fill(angles, values, color='orange', alpha=0.25)
        ax.plot(angles, values, color='orange', linewidth=2)
        ax.set_yticklabels([])
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(labels, fontsize=12)
        ax.set_title(title, size=14)
        plt.show()

    radar_chart(
        [df['Average_Order_Value'].mean(), df['Lifetime_Value'].mean(), df['Churn_Probability'].mean()],
        ['Avg Order Value', 'Lifetime Value', 'Churn Probability'],
        'Region Comparison'
    )

    # 8. Histogram: Distribution of Purchase Frequency
    plt.figure(figsize=(10, 6))
    df['Purchase_Frequency'].plot(kind='hist', bins=30, color='green', edgecolor='black', ax=plt.gca())
    plt.title('Distribution of Purchase Frequency')
    plt.xlabel('Purchase Frequency')
    plt.ylabel('Frequency')
    plt.show()

    # 9. Boxplot: Time Between Purchases by Season
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Season', y='Time_Between_Purchases', data=df, palette='muted')
    plt.title('Time Between Purchases by Season')
    plt.xlabel('Season')
    plt.ylabel('Time Between Purchases (days)')
    plt.show()

# Call the function to display all charts
display_all_charts(df)
In [ ]: